library(Seurat)
library(SnapATAC)
library(tidyverse)
library(DescTools) # 4 AUC function
library(glue)
library(ggalluvial) # 4 river plot
library(ggpubr)
source("~/multiOmic_benchmark/utils.R")
source("~/multiOmic_benchmark/KNN_agreement.R")
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
## Make output directory
outdir <- "~/multiOmic_benchmark/report/output/20191113_labelTransferEDA_F74_v2/"
ifelse(!dir.exists(outdir), dir.create(outdir), FALSE)
[1] FALSE
# model.cca <- readRDS("~/models/modelCCA_union_hvg_F74_SCElist_20191113.RDS")
# model.liger <- readRDS("~/models/modelLiger_union_hvg_F74_SCElist_20191113.RDS")
# model.conos <- readRDS("~/models/modelConos_union_hvg_F74_SCElist_20191113.RDS")
seu.cca <- readRDS("~/models/labelTransferCCA_union_hvg_F74_SCElist_20191119.RDS")
seu.liger <- readRDS("~/models/labelTransferLiger_union_hvg_F74_SCElist_20191119.RDS")
seu.conos <- readRDS("~/models/labelTransferConos_union_hvg_F74_SCElist_20191119.RDS")
integrate_features <- scan("~/intFeatures_union_hvg_2000_F74_SCElist_20191113.txt", what='')
Read 10603 items
int.list <- list(CCA=seu.cca, Liger=seu.liger, Conos=seu.conos)
# ## Make method color palette
# method.palette <- brewer_palette_4_values(names(int.list), "Set1")
Visualize label transfer on original ATAC data (embedded SnapATAC bins)
## Load original data
orig.ATAC <- readRDS("~/my_data/cellranger-atac110_count_30439_WSSS8038360_GRCh38-1_1_0.snapATAC.RDS")
sce.list <- readRDS("~/my_data/integrated_thymus/F74_SCElist_20191119.RDS")
orig.RNA <- sce.list$RNA
## Make SeuratObjects
atac.seu <- snapToSeurat(
obj=orig.ATAC,
eigs.dims=1:20,
norm=TRUE,
scale=TRUE
)
Epoch: checking input parameters ...
Non-unique features (rownames) present in the input matrix, making uniquePerforming log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
|
| | 0%
|
|==== | 3%
|
|======= | 6%
|
|=========== | 9%
|
|============== | 12%
|
|================== | 15%
|
|====================== | 18%
|
|========================= | 21%
|
|============================= | 24%
|
|================================ | 26%
|
|==================================== | 29%
|
|======================================= | 32%
|
|=========================================== | 35%
|
|=============================================== | 38%
|
|================================================== | 41%
|
|====================================================== | 44%
|
|========================================================= | 47%
|
|============================================================= | 50%
|
|================================================================= | 53%
|
|==================================================================== | 56%
|
|======================================================================== | 59%
|
|=========================================================================== | 62%
|
|=============================================================================== | 65%
|
|=================================================================================== | 68%
|
|====================================================================================== | 71%
|
|========================================================================================== | 74%
|
|============================================================================================= | 76%
|
|================================================================================================= | 79%
|
|==================================================================================================== | 82%
|
|======================================================================================================== | 85%
|
|============================================================================================================ | 88%
|
|=============================================================================================================== | 91%
|
|=================================================================================================================== | 94%
|
|====================================================================================================================== | 97%
|
|==========================================================================================================================| 100%
atac.seu <- RenameCells(atac.seu, new.names = orig.ATAC@metaData$barcode)
## Add cell type predictions
getPredictedLabels <- function(seu.int, int.name, id.col="predicted.id", score.col="score", filter_score=0){
pred.df <- seu.int$ATAC@meta.data[,c(id.col, score.col), drop=F]
colnames(pred.df) <- c('predicted.id', "score")
pred.df <- pred.df %>%
rownames_to_column("cell") %>%
mutate(predicted.id = ifelse(score < filter_score, NA, as.character(predicted.id))) %>%
column_to_rownames("cell")
rownames(pred.df) <- str_remove(rownames(pred.df), "^ATAC_")
colnames(pred.df) <- c(str_c("predicted.id", "_", int.name), str_c("score", "_", int.name))
pred.df
}
pred.cca <- getPredictedLabels(seu.cca, "CCA", score.col = "prediction.score.max")
pred.liger <- getPredictedLabels(seu.liger, "Liger")
pred.conos <- getPredictedLabels(seu.conos, "Conos")
if (all(rownames(pred.conos) == rownames(pred.cca)) & all(rownames(pred.conos) == rownames(pred.liger))) {
atac.seu <- AddMetaData(atac.seu, metadata = cbind(pred.cca, pred.liger, pred.conos))
} else {
stop("Non corresponding cell names")
}
Filter low confidence calls
ggpubr::ggarrange(
plotlist = list(
DimPlot(atac.seu, reduction = "umap.snap", group.by = "predicted.id_CCA", cols=cell.type.pal) +
scale_color_manual(values = cell.type.pal, na.value="grey80") +
ggtitle("CCA"),
DimPlot(atac.seu, reduction = "umap.snap", group.by = "predicted.id_Liger", cols=cell.type.pal) +
scale_color_manual(values = cell.type.pal, na.value="grey80") + ggtitle("Liger"),
DimPlot(atac.seu, reduction = "umap.snap", group.by = "predicted.id_Conos", cols=cell.type.pal) +
scale_color_manual(values = cell.type.pal, na.value="grey80") + ggtitle("Conos"),
DimPlot(orig.RNA.seu, reduction = "umap", group.by = "annotation", cols=cell.type.pal) + ggtitle("RNA") +
scale_color_manual(values = cell.type.pal, na.value="grey80")
),
common.legend = TRUE, ncol=4, nrow=1
) +
ggsave(paste0(outdir, "umap_labels_filtered.png"), width=16, height = 6)
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
pl <- DimPlot(atac.seu, reduction = "umap.snap", group.by = "predicted.id_CCA", cols=cell.type.pal, label=TRUE, repel=TRUE) + ggtitle("CCA")
plotly::ggplotly(pl)
geom_GeomTextRepel() has yet to be implemented in plotly.
If you'd like to see this geom implemented,
Please open an issue with your example code at
https://github.com/ropensci/plotly/issues
pl <- DimPlot(atac.seu, reduction = "umap.snap", group.by = "predicted.id_Conos", cols=cell.type.pal, label=TRUE, repel=TRUE) + ggtitle("Conos")
plotly::ggplotly(pl)
geom_GeomTextRepel() has yet to be implemented in plotly.
If you'd like to see this geom implemented,
Please open an issue with your example code at
https://github.com/ropensci/plotly/issues
orig.RNA.seu <- as.Seurat(orig.RNA)
orig.RNA.seu <- FindVariableFeatures(orig.RNA.seu)
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
orig.RNA.seu <- ScaleData(orig.RNA.seu)
Centering and scaling data matrix
|
| | 0%
|
|==================================================== | 50%
|
|=======================================================================================================| 100%
orig.RNA.seu <- RunPCA(orig.RNA.seu)
PC_ 1
Positive: TRBC2, TRBC1, HMGA1, HIST1H1C, HIST1H3H, ITM2A, HIST1H2BJ, SMIM24, TRAV13-2, FXYD2
TRBV7-2, PCGF5, HIST1H2BH, IL32, HIST1H2BN, CHAC1, RASD1, TRBV27, TRAV13-1, TRAV8-2
TRAV8-4, PTPN6, SELL, HIST1H2BG, TAGAP, TRDC, TRAV38-2DV8, TRAV29DV5, TRBV9, TRAV41
Negative: CALD1, COL5A2, COL6A1, COL6A2, SPARC, THY1, DCN, COL3A1, NFIB, SPARCL1
TSHZ2, CPE, PLAC9, NID1, FKBP10, PTN, FLRT2, MAP1B, EFEMP2, BGN
CXCL12, RBP1, LAMB1, AHNAK, COL1A1, COL5A1, FSTL1, LUM, LAMA4, MDK
PC_ 2
Positive: SFRP1, NTRK2, PLAT, ISLR, NRK, SCARA5, ASPN, OSR1, OLFML3, MXRA8
CAPN6, PTPRD, PLP1, TMEFF2, CREB3L1, DKK3, CERCAM, MMP2, EBF2, SMOC2
CDO1, COL12A1, PDGFRA, LRRC17, THBS2, HTRA3, SFRP2, ANGPTL1, MAB21L1, MXRA5
Negative: MKI67, CDK1, NUSAP1, TOP2A, CCNA2, RRM2, UBE2C, BIRC5, KIFC1, TYMS
UBE2T, AURKB, CENPF, CENPM, CDCA8, TACC3, NCAPG, TPX2, ASF1B, CDKN3
GTSE1, CDCA3, HJURP, SPC25, MAD2L1, CDC20, PLK1, DLGAP5, NUF2, KIF22
PC_ 3
Positive: CCNA2, NUF2, GTSE1, CDCA8, UBE2T, AURKB, PBK, CDK1, NCAPG, NDC80
KIFC1, CDCA3, HJURP, MAD2L1, SPC25, PLK1, KIF15, DEPDC1B, BIRC5, CDCA5
CKS1B, RRM2, DLGAP5, HMMR, CENPA, KIF22, KIF20A, CDCA2, CENPF, KIF2C
Negative: HLA-DRB5, HLA-DRA, TYROBP, HLA-DRB1, HLA-DPA1, HLA-DPB1, HLA-DQA1, C1QC, C1QB, RNASE1
HLA-DQB1, A2M, C1QA, CSF1R, STAB1, HCK, HLA-DMB, FOLR2, SAMHD1, LYZ
MS4A7, SPI1, CD36, MS4A6A, CYBB, TMEM176B, CD74, IGSF6, MPEG1, MS4A4A
PC_ 4
Positive: GYPA, GYPB, NFE2, GYPE, ANK1, SLC4A1, RHAG, AHSP, DMTN, KLF1
GATA1, TMOD1, TMEM56, HBZ, HBG1, GMPR, C17orf99, SMIM5, HBQ1, TSPO2
ALAS2, PHOSPHO1, CR1L, TRIM58, HBM, EPB42, RHD, RHCE, SPTA1, SMIM1
Negative: TMSB10, TRBC2, CCL21, IL32, CXCL13, TRBC1, APLNR, CCL19, MIF, HMGB1
COX4I2, COL4A1, COL15A1, MADCAM1, FDCSP, CCL17, NTS, TNC, CDH5, FABP4
CAV1, COL4A2, CRIP2, RGS5, KLRB1, MYLK, IFITM1, IL33, PAPLN, HPN
PC_ 5
Positive: APLNR, CDH5, CAV1, COL15A1, CRIP2, COL4A1, CCL21, KDR, CLDN5, MADCAM1
FABP4, IL33, COL4A2, CXCL13, TM4SF1, PODXL, CCL19, COX4I2, ESAM, BCAM
NTS, PAPLN, SPNS2, MYLK, C8orf4, ADGRF5, TM4SF18, TGM2, RP11-536O18.1, CXorf36
Negative: CSF1R, MS4A4A, CYBB, PLD4, MS4A6A, CD163, HCK, IGSF6, FOLR2, ADAP2
CD86, MS4A7, MARCH1, MRC1, F13A1, MPEG1, CD14, SPI1, HLA-DMB, GPR34
CD33, TGFBI, CLEC7A, TIMD4, CEBPA, SIGLEC1, CSF2RA, SLC15A3, LY86, AGR2
orig.RNA.seu <- RunUMAP(orig.RNA.seu, dims=1:40)
10:43:01 UMAP embedding parameters a = 0.9922 b = 1.112
10:43:01 Read 8321 rows and found 40 numeric columns
10:43:01 Using Annoy for neighbor search, n_neighbors = 30
10:43:01 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
10:43:03 Writing NN index file to temp file /tmp/RtmpsI55IG/file18a72231550b
10:43:03 Searching Annoy index using 1 thread, search_k = 3000
10:43:06 Annoy recall = 100%
10:43:08 Commencing smooth kNN distance calibration using 1 thread
10:43:10 Initializing from normalized Laplacian + noise
10:43:11 Commencing optimization for 500 epochs, with 367644 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
10:43:32 Optimization finished
plotly::ggplotly(DimPlot(orig.RNA.seu, group.by="annotation"))
ggarrange(plotlist = map(cell.types[!cell.types %in% c( "NK","NA(1)","NA(3)","ILC3","SP (2)")] , ~ compareCluster(.x)), ncol=1) +
ggsave(paste0(outdir, "umap_clusters.png"), height = 30, width = 10)
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Quantifies the uncertainty of the prediction. Calculated differently for every method, but used to define which cells are “unassigned”.
orig.composition <- orig.RNA$annotation
orig.frac <- table(orig.composition)/length(orig.composition)
orig.frac.df <- data.frame(orig.frac) %>%
dplyr::rename(predicted.id=orig.composition, frac.label=Freq) %>%
mutate(method="original.RNA")
score_cols <- str_subset(colnames(atac.seu@meta.data), 'score_')
label_cols <- str_subset(colnames(atac.seu@meta.data), 'predicted.id_')
pred.labels.df <- imap(list(CCA=pred.cca, Liger=pred.liger, Conos=pred.conos), ~
rownames_to_column(.x, "cell") %>%
rename_all(funs(str_remove(., str_c("_",.y)))) %>%
mutate(method=.y)
) %>%
purrr::reduce(bind_rows) %>%
mutate(score=ifelse(is.na(score), 0, score))
predict_score_hist <-
pred.labels.df %>%
ggplot(aes(score, fill=method)) +
geom_histogram(position="identity", alpha=0.8, bins=40) +
facet_grid(method ~.) +
scale_fill_brewer(palette="Set1") +
xlab("Label prediction score") +
theme_bw(base_size = 16) +
theme(legend.position = "top")
cutoffs <- seq(0,1,0.05)
predict_score_cumedist <-
pred.labels.df %>%
group_by(method) %>%
mutate(bins=cut(score, breaks = cutoffs)) %>%
mutate(score=as.numeric(str_remove_all(as.character(bins), ".+,|]"))) %>%
ggplot(aes(score, color=method)) +
stat_ecdf(size=0.8, alpha=0.7) +
scale_color_brewer(palette = "Set1") +
ylab("Fraction of unassigned cells") +
xlab("Prediction score cutoff") +
theme_bw(base_size = 16) +
xlim(0,1) +
coord_fixed() +
guides(color="none")
ggpubr::ggarrange(predict_score_hist, predict_score_cumedist, common.legend = TRUE, widths = c(0.8, 1.2),
labels=c("A", "B")) +
ggsave(paste0(outdir, "prediction_score_distribution.png"), height = 6, width = 10)
Removed 47 rows containing non-finite values (stat_ecdf).
ggpubr::ggarrange(
plotlist = list(
FeaturePlot(atac.seu, reduction = "umap.snap", feature = "score_CCA" , coord.fixed = TRUE) + scale_color_viridis_c() + ggtitle("CCA"),
FeaturePlot(atac.seu, reduction = "umap.snap", feature = "score_Liger", coord.fixed = TRUE) + scale_color_viridis_c() + ggtitle("Liger"),
FeaturePlot(atac.seu, reduction = "umap.snap", feature = "score_Conos", coord.fixed = TRUE) + scale_color_viridis_c() + ggtitle("Conos")
),
common.legend = TRUE, ncol=3, nrow=1
) +
ggsave(paste0(outdir, "prediction_score_umaps.png"), height = 7, width=14)
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Compare cell type fractions (w uncertainty)
pred.labels.df %>%
group_by(method) %>%
drop_na() %>%
mutate(tot.cells=n()) %>%
ungroup() %>%
group_by(method, predicted.id) %>%
summarise(tot.label = n(), tot.cells = max(tot.cells), mean.score=mean(score)) %>%
mutate(frac.label=tot.label/tot.cells) %>%
bind_rows(orig.frac.df) %>%
mutate(orig.rank = orig.rank.df[predicted.id,]) %>%
mutate(predicted.id=factor(predicted.id, levels=rownames(orig.rank.df)))%>%
# select(method, predicted.id, frac.label) %>%
# distinct() %>%
ggplot(aes(predicted.id, frac.label, fill=mean.score, color=mean.score)) +
geom_point(size=2) +
geom_col(width=0.05) +
coord_flip() +
# geom_line(aes(group=method)) +
facet_wrap(method~., nrow=1, ncol=4, scales="free_x") +
scale_color_viridis_c() +
scale_fill_viridis_c() +
ylab("Fraction of cells") +
theme_bw(base_size = 16) +
ggsave(paste0(outdir, "cell_type_composition_bars.png"), width = 15, height = 5)
binding character and factor vector, coercing into character vector
Calculate which fractions of NNs in bin based graph of ATAC cells have the same annotation
k = 30
atac.seu <- FindNeighbors(atac.seu, assay = "ATAC", reduction = "SnapATAC", dims = 1:20, k.param = k)
Computing nearest neighbor graph
Computing SNN
atac.nn.list <- getNNlist(atac.seu)
knn.score.CCA <- test.knn(atac.nn.list, setNames(pred.cca.filtered$predicted.id_CCA, rownames(pred.cca.filtered)))
p-value will be approximate in the presence of ties
knn.score.conos <- test.knn(atac.nn.list, setNames(pred.conos.filtered$predicted.id_Conos, rownames(pred.conos.filtered)))
p-value will be approximate in the presence of ties
knn.score.liger <- test.knn(atac.nn.list, setNames(pred.liger.filtered$predicted.id_Liger, rownames(pred.liger.filtered)))
p-value will be approximate in the presence of ties
knn_score_df <-
list(CCA=knn.score.CCA, conos=knn.score.conos, liger=knn.score.liger) %>%
imap( ~ data.frame(KNN_score = .x$KNN_score, D=.x$D, p.val=.x$p.val, method=.y)) %>%
# imap( ~ data.frame(KNN_score = .x$KNN_score, cell= names(.x$KNN_score), D=.x$D, p.val=.x$p.val, method=.y)) %>%
purrr::reduce(bind_rows) %>%
dplyr::mutate(KNN_score=ifelse(is.na(KNN_score), 0, KNN_score)) %>%
mutate(data="true")
Unequal factor levels: coercing to characterbinding character and factor vector, coercing into character vectorbinding character and factor vector, coercing into character vectorbinding character and factor vector, coercing into character vector
knn_score_null_df <-
list(CCA=knn.score.CCA, conos=knn.score.conos, liger=knn.score.liger) %>%
imap( ~ data.frame(KNN_score = .x$null, D=.x$D, p.val=.x$p.val, method=.y)) %>%
# imap( ~ data.frame(KNN_score = .x$KNN_score, cell= names(.x$KNN_score), D=.x$D, p.val=.x$p.val, method=.y)) %>%
purrr::reduce(bind_rows) %>%
dplyr::mutate(KNN_score=ifelse(is.na(KNN_score), 0, KNN_score)) %>%
mutate(data="null")
Unequal factor levels: coercing to characterbinding character and factor vector, coercing into character vectorbinding character and factor vector, coercing into character vectorbinding character and factor vector, coercing into character vector
bind_rows(knn_score_df, knn_score_null_df) %>%
ggplot(aes(KNN_score, color=method)) +
stat_ecdf( aes(alpha=data), size=1) +
# stat_ecdf(data=. %>% filter(data=="true"), size=1) +
facet_grid(method~.) +
scale_alpha_discrete( range=c(0.5,1), name="") +
scale_color_brewer(palette = "Set1") +
geom_text(data= . %>% distinct(method, D, p.val),
x=1, y=0.05, hjust=1,
aes(label=glue("KNN score = {round(D, 3)}, p.value: {p.val}"), y=c(0.90, 0.95, 1))) +
theme_bw(base_size = 16) +
ylab("ECDF") + xlab("Fraction of KNNs with shared label") +
ggsave(paste(outdir,"KNN_score_ecdf_unionHVG.png"), height = 6, width=7)
Using alpha for a discrete variable is not advised.
cluster.prob <- (clust.size/length(pred.labels))
pred.labels.prob <- cluster.prob[pred.labels[which(!is.na(pred.labels))]]
(cluster.prob*k)
cluster.prob^k
length(pred.labels)
plot(knn.scores, knn.scores /as.numeric(pred.labels.prob))
hist( knn.scores - as.numeric(pred.labels.prob)*k)
Taking markers from Fig. S2 of JP’s manuscript
thymus.markers <- c("PTPRC", "CD3G", "TYROBP","CD19","HOXA9",'FXYD2',"SH3TC1","CCR9","CD8A", "CD8B","PDCD1", "CRTAM","CD40LG","CCR6","FOXP3","SOX13","ZNF683","KLRD1","TNFSF11","VPREB1","MS4A1", "CLEC9A", "CLEC10A", "LAMP3", "IL3RA", "FCGR3B", "C2","TPSB2",
'ITGA2B',"GYPA", "CDH5", "RGS5","CDH1", "PDGFRA","CRABP1")
# pbmc.markers <- c("CD79A", "MS4A1", "CD8A", "CD8B", "LYZ")
# thymus.markers <- list(Fb=c("PDGFRA", "COLEC11", "FBN1", "PI16"),
# VSMC=c("PDGFRB", 'ACTA2', "RGS5"),
# Endo=c("PECAM1", "CDH5","LYVE1"),
# TEC = c("EPCAM", "FOXN1", "CCL25", "CCL19")
# )
thymus.markers.df <- imap(thymus.markers, ~ data.frame(gene=.x, cell.type.class=.y)) %>%
purrr::reduce(bind_rows)
marker.access.df <- atac.seu@assays$ACTIVITY@data[intersect(thymus.markers, rownames(atac.seu@assays$ACTIVITY)),] %>%
as.matrix() %>%
reshape2::melt(varnames=c("gene", "cell"), value.name="log.counts") %>%
full_join(rownames_to_column(atac.seu@meta.data[, label_cols], "cell")) %>%
# full_join(thymus.markers.df) %>%
pivot_longer(cols=label_cols, names_to = "method", values_to = "predicted.id") %>%
dplyr::mutate(method=str_remove(method,".+_")) %>%
filter(method %in% c("CCA", "Liger", "Conos"))
ordered_cell_types <- c("DN", "DP (Q)", "DP (P)", "SP (1)", "NK", "ILC3", "DC", "Mac", "Ery", "Fib")
markers_pl <-
marker.access.df %>%
mutate(predicted.id = case_when(str_detect(predicted.id, "CD8") ~ "CD8+T",
# str_detect(predicted.id, "CD4") ~ "CD4+T",
TRUE ~ predicted.id
)
) %>%
mutate(predicted.id=factor(predicted.id, levels = ordered_cell_types)) %>%
group_by(method, predicted.id, gene) %>%
dplyr::mutate(frac.cells=sum(log.counts > 0)/n()) %>%
# filter(method=="CCA") %>%
ungroup() %>%
ggplot( aes( gene, predicted.id)) +
geom_point(aes(size=frac.cells, color=frac.cells)) +
facet_grid(method~., space="free", scales="free_x") +
scale_color_gradient(high="darkblue", low="white") +
# scale_color_viridis_c() +
theme_bw(base_size = 16) +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5),
strip.text.x = element_text(angle=45))
markers_pl
ggsave(paste0(outdir, "Thymus_markers_accessibility.png"), height = 16, width = 12)
Reproducing Fig.2H on T-cell development
tcells.markers.df %>%
full_join(t.cell.markers.df) %>%
# filter(method=="CCA") %>%
mutate(predicted.id=factor(predicted.id, levels=ordered.tcells)) %>%
ggplot(aes( predicted.id, gene)) +
facet_grid(cell.type.class~method, scales = "free_y", space="free") +
geom_point(aes(size=frac.cells, color=frac.cells)) +
# scale_color_gradient(high="darkblue", low="white") +
scale_color_viridis_c() +
# scale_color_gradient2(midpoint = 0.5) +
theme_bw(base_size = 16) +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5),
strip.text.y = element_text(angle=0))
Joining, by = "gene"
Column `gene` joining factor and character vector, coercing into character vector